/*****************

Analysis.do
--Creates some new variables and generates results, figures, and tables for the main paper.

******************/

/*When this script is executed, Stata's working directory should be set to the Project/ folder*/

*Work with union dataset

use "Data\IntermediateData\state_clean.dta", clear

*Figure 1

tsline pctmem100 if state=="OH" & sector=="Total", lpattern(solid) || tsline pctmem100 if state=="KY" & sector=="Total", lpattern(dash) title("Union membership rates in Ohio and Kentucky, 1983-2022") ytitle("Percentage of employed workers") legend(label(1 "Ohio") label(2 "Kentucky")) tline(2017, lcolor("black")) ttext(7 2017 "KY RTW", placement("w") orient(vert)) saving(Output/Results/Figures/pctmem_OH_KY.gph, replace) // clear consistent decline, KY rate rises in the years leading up to 2017, but then quickly declines, diverging from OH rate
*There is no analogous decrease for Ohio: by 2012, the unionization rate has evened out

*Figure 2

*Kentucky
tsline pctmem100 if state=="KY" & sector=="Priv. Construction", lpattern(solid) || tsline pctmem100 if state=="KY" & sector=="Priv. Manufacturing", lpattern(dash) || tsline pctmem100 if state=="KY" & sector=="Private", lpattern(dash_dot) title("Union mebmership rates in Kentucky, by sector") ytitle("Percentage of employed workers") legend(label(1 "Priv. Construction") label(2 "Priv. Manufacturing") label(3 "Private")) tline(2017, lcolor("black")) ttext(4 2017 "KY RTW", placement("w") orient(vert)) saving(Output/Results/Figures/pctmem_KY_sector.gph, replace) // can see the large drops after 2017 in construction; this is difficult to disentangle from the overall downward trend over that past 40 years, however

*Figure 3

*Ohio
tsline pctmem100 if state=="OH" & sector=="Priv. Construction", lpattern(solid) || tsline pctmem100 if state=="OH" & sector=="Priv. Manufacturing", lpattern(dash) || tsline pctmem100 if state=="OH" & sector=="Private", lpattern(dash_dot) title("Union mebmership rates in Ohio, by sector") ytitle("Percentage of employed workers") legend(label(1 "Priv. Construction") label(2 "Priv. Manufacturing") label(3 "Private")) tline(2017, lcolor("black")) ttext(4 2017 "KY RTW", placement("w") orient(vert)) saving(Output/Results/Figures/pctmem_OH_sector.gph, replace) // all private sectors show consistent decline, and there is no divergence from the trend around 2017

*Figure A1

tsline pctmem100 if state=="OH" & sector=="Total", lpattern(solid) || tsline pctmem100 if state=="KY" & sector=="Total", lpattern(longdash_dot) || tsline pctcov100 if state=="OH" & sector=="Total", lpattern(dash) || tsline pctcov100 if state=="KY" & sector=="Total",  lpattern(dash_dot) title("Union membership vs coverage, Ohio and Kentucky") ytitle("Percentage of employed workers") legend(label(1 "OH membership") label(2 "KY membership") label(3 "OH coverage") label(4 "KY coverage")) tline(2017, lcolor("black")) ttext(0 2017 "KY RTW", placement("s") orient(vert)) saving(Output/Results/Figures/pctmem_pctcov_OH_KY.gph, replace) // coverage is always higher, but the two measures follow roughly identical trends for both states


*Work with analysis dataset

use "Data\AnalysisData\ITA_state.dta", clear

*Table 1

*Summary statistics

describe

outreg2 using "Output\Results\Tables\summary_stats.doc", label word sum(log) keep (ann_empl total_hours_worked total_dafw_days total_injuries total_resp_cond construction manufacturing private) ti (Table 1: Summary Statistics) replace

*Figure 6

preserve

collapse inj_per_worker, by(year state)

tsline inj_per_worker if state=="OH", ytitle("Average injuries-per-worker") || tsline inj_per_worker if state=="KY", lpattern(dash) legend(label(1 "OH") label(2 "KY")) title("Injuries per worker, 2016-2022") tline(2017, lcolor("black")) ttext(3 2017 "KY RTW", orient(vert)) saving(Output/Results/Figures/tsline_ipw_OH_KY.gph, replace) // 

restore


preserve
drop if year!=2016
contract state size sector, freq(tally)
reshape wide tally, i(state sector) j(size)

*Figure 4

graph bar tally1 tally2 tally3 if state=="KY", over(sector) stack blabel(bar, size(2)) bar(1, bstyle(outline)) legend(label(1 "<20 workers") label(2 "20-249 workers") label(3 "250+ workers")) title("Number of workplaces by sector and size, Kentucky") ytitle("Frequency") saving(Output/Results/Figures/KY_sector_size.gph, replace)

*Figure 5

graph bar tally1 tally2 tally3 if state=="OH", over(sector) stack blabel(bar, size(2)) bar(1, bstyle(outline)) legend(label(1 "<20 workers") label(2 "20-249 workers") label(3 "250+ workers")) title("Number of workplaces by sector and size, Ohio") ytitle("Frequency") saving(Output/Results/Figures/OH_sector_size.gph, replace)

restore

*Tables to show if injury rates differ based on size, sector

*Table 2

table ( state sector ) ( size ) (), statistic(mean inj_per_worker)
collect label dim state "State", modify
collect label dim sector "Sector", modify
collect label dim size "Size of Workplace", modify
collect style cell, border( right, pattern(nil) )
collect export "Output\Results\Tables\ipw_state_sector_size", as(docx) replace


*set panel data format for regressions
xtset establishment_id year

*DD regressions -- regressing injury measure with state/time dummies and the interaction term

local dep_var_dafw "log_total_dafw_days"
local main_regressors "state_dummy time_dummy_2017 state_time"
local int_regressors "manufacturing construction manufacturing_KY manufacturing_post manufacturing_KY_post construction_KY construction_post construction_KY_post"

*2 tables -- 1 log_total_injuries, 1 log_total_dafw_days

regress log_total_injuries state_dummy time_dummy_2017 state_time, robust

outreg2 using "Output\Results\Tables\log_injuries_regression.doc", label word keep (state_dummy time_dummy_2017 state_time) ti (Table 5: Total Injuries DD Regression) replace

regress log_total_injuries state_dummy time_dummy_2017 state_time log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

outreg2 using "Output\Results\Tables\log_injuries_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl) ti (Table 5: Total Injuries DD Regression) append

regress log_total_injuries state_dummy time_dummy_2017 state_time manufacturing construction manufacturing_KY manufacturing_post manufacturing_KY_post construction_KY construction_post construction_KY_post log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

*Table 5

outreg2 using "Output\Results\Tables\log_injuries_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl manufacturing construction) ti (Table 5: Total Injuries DD Regression) append

*F-test of joint significance of coefficients

*In-text, p. 27:"F-tests of the joint significance of the variables shows that the state dummy and DD term are jointly significant (p-value: 0.015), while the treatment dummy and DD term are highly statistically insignificant (p-value: 0.682). All three regressors are highly jointly significant (p-value: 0.0051)."

log using "Output\Results\InText\log_tot_inj_Ftest.log", replace

test time_dummy_2017
test state_dummy
test state_dummy+state_time=0
test time_dummy_2017+state_time=0
test time_dummy_2017+state_dummy+state_time=0
test state_time

log close


*Days away from work

regress log_total_dafw_days state_dummy time_dummy_2017 state_time, robust

outreg2 using "Output\Results\Tables\log_dafw_regression.doc", label word keep (state_dummy time_dummy_2017 state_time) ti (Table 6: Log of Days Away DD Regression) replace

regress log_total_dafw_days state_dummy time_dummy_2017 state_time log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

outreg2 using "Output\Results\Tables\log_dafw_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl) ti (Table 6: Log of Days Away DD Regression) append

regress `dep_var_dafw' `main_regressors' `int_regressors' log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

*Table 6

outreg2 using "Output\Results\Tables\log_dafw_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl manufacturing construction) ti (Table 6: Log of Days Away DD Regression) append

*F-test of joint significance of coefficients -- test the marginal differences (sum of marginal reuslts=0)

*In-text, p. 27:"The same F-tests as used for the total injuries model reveal that no combination of the main regressors of interest are jointly significant."

log using "Output\Results\InText\log_tot_dafw_Ftest.log", replace

test time_dummy_2017
test state_dummy
test state_dummy+state_time=0
test time_dummy_2017+state_time=0
test time_dummy_2017+state_dummy+state_time=0
test state_time

log close


*** Models using just the Cincinnati MSA

drop if Cin_MSA!=1

sort establishment_id
by establishment_id: egen years_available=count(establishment_id)

drop if years_available!=7

tabulate years_available // 264 establishments in the Cincinnati MSA

drop years_available

*Table 3

describe

outreg2 using "Output\Results\Tables\Cin_summary_stats.doc", label word sum(log) keep (ann_empl total_hours_worked total_dafw_days total_injuries total_resp_cond construction manufacturing private) ti (Table ###: Summary Statistics, Cincinnati MSA Only) replace

preserve
drop if year!=2016
contract state size sector, freq(tally)
reshape wide tally, i(state sector) j(size)

*Figure 7

graph bar tally1 tally2 tally3 if state=="KY", over(sector) stack blabel(bar, size(2)) bar(1, bstyle(outline)) legend(label(1 "<20 workers") label(2 "20-249 workers") label(3 "250+ workers")) title("Workplaces in Kentucky, Cincinnati MSA Only") ytitle("Frequency") saving(Output/Results/Figures/Cin_KY_sector_size.gph, replace)

*Figure 8

graph bar tally1 tally2 tally3 if state=="OH", over(sector) stack blabel(bar, size(2)) bar(1, bstyle(outline)) legend(label(1 "<20 workers") label(2 "20-249 workers") label(3 "250+ workers")) title("Workplaces in Ohio, Cincinnati MSA Only") ytitle("Frequency") saving(Output/Results/Figures/Cin_OH_sector_size.gph, replace)

restore

*Figure 9

preserve

collapse inj_per_worker, by(year state)

tsline inj_per_worker if state=="OH", ytitle("Average injuries-per-worker") || tsline inj_per_worker if state=="KY", legend(label(1 "OH") label(2 "KY")) title("Injuries per worker, Cincinnati MSA Only") tline(2017, lcolor("black")) ttext(3 2017 "KY RTW", orient(vert)) saving(Output/Results/Figures/Cin_tsline_ipw.gph, replace) // 

restore


*Table 4

table ( state sector ) ( size ) (), statistic(mean inj_per_worker)
collect label dim state "State", modify
collect label dim sector "Sector", modify
collect label dim size "Size of Workplace", modify
collect style cell, border( right, pattern(nil) )
collect export "Output\Results\Tables\Cin_ipw_state_sector_size", as(docx) replace


*Repeat regressions for the Cincinnati MSA

regress log_total_injuries state_dummy time_dummy_2017 state_time, robust

outreg2 using "Output\Results\Tables\Cin_log_injuries_regression.doc", label word keep (state_dummy time_dummy_2017 state_time) ti (Table 7: Total Injuries DD Regression, Cincinnati MSA Only) replace

regress log_total_injuries state_dummy time_dummy_2017 state_time log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

outreg2 using "Output\Results\Tables\Cin_log_injuries_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl) ti (Table 7: Total Injuries DD Regression, Cincinnati MSA Only) append

regress log_total_injuries state_dummy time_dummy_2017 state_time manufacturing construction manufacturing_KY manufacturing_post manufacturing_KY_post construction_KY construction_post construction_KY_post log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

*Table 7

outreg2 using "Output\Results\Tables\Cin_log_injuries_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl manufacturing construction) ti (Table 7: Total Injuries DD Regression, Cincinnati MSA Only) append

*F-test of joint significance of coefficients -- test the marginal differences (sum of marginal reuslts=0)

*In-text, p. :"In both regressions, the state, time, and DD dummies are highly jointly statistically significant (p-value: 0.008 for total injuries,"

log using "Output\Results\InText\Cin_log_tot_inj_Ftest.log", replace

test time_dummy_2017
test state_dummy
test state_dummy+state_time=0
test time_dummy_2017+state_time=0
test time_dummy_2017+state_dummy+state_time=0
test state_time

log close

*Days away from work

regress log_total_dafw_days state_dummy time_dummy_2017 state_time, robust

outreg2 using "Output\Results\Tables\Cin_log_dafw_regression.doc", label word keep (state_dummy time_dummy_2017 state_time) ti (Table 8: Log of Days Away DD Regression, Cincinnati MSA Only) replace

regress log_total_dafw_days state_dummy time_dummy_2017 state_time log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

outreg2 using "Output\Results\Tables\Cin_log_dafw_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl) ti (Table 8: Log of Days Away DD Regression, Cincinnati MSA Only) append

regress `dep_var_dafw' `main_regressors' `int_regressors' log_ann_empl log_ann_empl_KY log_ann_empl_post log_ann_empl_KY_post, robust

*Table 8

outreg2 using "Output\Results\Tables\Cin_log_dafw_regression.doc", label word keep (state_dummy time_dummy_2017 state_time log_ann_empl manufacturing construction) ti (Table 8: Log of Days Away DD Regression, Cincinnati MSA Only) append


use "Data\AnalysisData\ITA_state.dta", clear

*F-test of joint significance of coefficients -- test the marginal differences (sum of marginal reuslts=0)

*In-text, p. :"0.047 for total days away from work)."

log using "Output\Results\InText\Cin_log_dafw_Ftest.log", replace

test time_dummy_2017
test state_dummy
test state_dummy+state_time=0
test time_dummy_2017+state_time=0
test time_dummy_2017+state_dummy+state_time=0
test state_time

log close

*Blinder-Oaxaca decomposition of injury rates and state

log using "Output\Results\InText\log_tot_inj_BO_test.log", replace

*In-text, p.26: "the overall difference declined (in magnitude) from -0.12 in 2016 to -0.03 in 2022.""
*"Estimates of the impact of explained variation on overall differences in injury rates are insignificant for every analysis year except for 2022"

di "2016 analysis year"
oaxaca log_total_injuries log_ann_empl construction private if year==2016, by(state_dummy) w(1)

matrix coeffs = (e(b)[1,4] \ e(b)[1,5])

forvalues i = 2017/2022{
	di "`i' analysis year"
	oaxaca log_total_injuries log_ann_empl construction private ///
	if year==`i', by(state_dummy) w(1)
	
	matrix add_`i' = (e(b)[1,4] \ e(b)[1,5])
	matrix coeffs = (coeffs, add_`i')
}

log close

*Figure 10

matrix coeffs_transpose = coeffs'

preserve

collapse empl, by(year)
svmat coeffs_transpose, names("portion")

tsline portion1 || tsline portion2, lpattern(dash) ytitle("Contribution to difference in group mean") title("Blinder-Oaxaca decomposition coefficients, 2016-2022") legend(label(1 "Endowments") label(2 "Unexplained")) saving("Output\Results\Figures\B_O_coeffs.gph", replace)

restore






